Consider the cohort as a single group to start. Do not stratify by case-control status.

Stack the heatmaps for CD4. Are there obvious patterns to the antigens?
Stack the heatmaps for NOTCD4. Are there patterns? Are these patterns different from CD4?

Calculate FS/PFS for each stimulation in CD4 and compare side-by-side in boxplots and test by ANOVA. Is there a clear winner? Calculate FS/PFS for each stimulation in NOTCD4 and compare with boxplot and ANOVA. Is there a winner? Is this different than CD4?

It may also make sense to make a corrplot of the FS/PFS from the different stims. This is asking a different question though, i.e. whether a patient’s cytokine responses tend to be consistently elevated/not-elevated across multiple stims.

Regress FS/PFS against age (linear). Is there an effect of age on T cell functionality? Regress FS/PFS against days from symptom onset (linear). Is there a decrease in functionality with time? Stratify FS/PFS by sex and test by t-test (or Mann-Whitney, though sample size is large enough). Is there a difference?

Only then would you start asking the case-control questions.

library(here)
library(tidyverse)
library(COMPASS)
library(ggplot2)
library(data.table)
library(broom)
library(psych)
library(corrplot)
library(ggpubr)
library(knitr)
library(kableExtra)
library(cowplot)
library(purrr)
library(flowWorkspace)
library(GGally)
library(patchwork)
library(RColorBrewer)
library(ComplexHeatmap)
library(ggbeeswarm)
source(here::here("scripts/20200604_Helper_Functions.R"))
save_output <- FALSE

# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
                   metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
                               here::here("data/Arial_afm/ArialMT-Bold.afm"), 
                               here::here("data/Arial_afm/ArialMT-Italic.afm"),
                               here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)

Read in data

# Next 4 lines from Run_COMPASS.R
stims_for_compass_runs <- c("VEMP", "Spike 1", "Spike 2", "NCAP")
parent_nodes_for_compass_runs <- c("4+", "NOT4+", "8+")
stims_for_compass_runs_rep <- rep(stims_for_compass_runs, each = length(parent_nodes_for_compass_runs))
parent_nodes_for_compass_runs_rep <- rep(parent_nodes_for_compass_runs, times = length(stims_for_compass_runs))

crList <- purrr::pmap(.l = list(parent_nodes_for_compass_runs_rep,
                                stims_for_compass_runs_rep),
                      .f = function(parent, currentStim) {
                        currentStimForFile <- gsub(" ", "_", currentStim)
                        crPath <- here::here(sprintf("out/CompassOutput/%s/%s/COMPASSResult_%s_%s.rds", parent, currentStimForFile, parent, currentStimForFile))
                        readRDS(crPath)
                      }) %>% 
  set_names(gsub("+", "", gsub(" ", "_", paste0(parent_nodes_for_compass_runs_rep, "_", stims_for_compass_runs_rep)), fixed=TRUE))
names(crList)
##  [1] "4_VEMP"       "NOT4_VEMP"    "8_VEMP"       "4_Spike_1"    "NOT4_Spike_1"
##  [6] "8_Spike_1"    "4_Spike_2"    "NOT4_Spike_2" "8_Spike_2"    "4_NCAP"      
## [11] "NOT4_NCAP"    "8_NCAP"
CD4RunNames <- c("4_Spike_1", "4_Spike_2", "4_NCAP", "4_VEMP")
NotCD4RunNames <- c("NOT4_Spike_1", "NOT4_Spike_2", "NOT4_NCAP", "NOT4_VEMP")
CD8RunNames <- c("8_Spike_1", "8_Spike_2", "8_NCAP", "8_VEMP")

Stacked heatmaps

plotCOMPASSResultStack(crList[CD4RunNames],
                       row_annotation = c("Stim"),
                       variable = "Stim",
                       show_rownames = FALSE,
                       main = "CD4 COMPASS Runs",
                       fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa

plotCOMPASSResultStack(crList[NotCD4RunNames],
                       row_annotation = c("Stim"),
                       variable = "Stim",
                       show_rownames = FALSE,
                       main = "Not-CD4 COMPASS Runs",
                       fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa

plotCOMPASSResultStack(crList[CD8RunNames],
                       row_annotation = c("Stim"),
                       variable = "Stim",
                       show_rownames = FALSE,
                       main = "CD8 COMPASS Runs",
                       fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa

plotCOMPASSResultStack(crList[c(CD4RunNames, NotCD4RunNames)],
                       row_annotation = c("Stim"),
                       variable = "Stim",
                       show_rownames = FALSE,
                       main = "CD4 and Not-CD4 COMPASS Runs",
                       fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa

plotCOMPASSResultStack(crList[c(CD4RunNames, CD8RunNames)],
                       row_annotation = c("Stim"),
                       variable = "Stim",
                       show_rownames = FALSE,
                       main = "CD4 and CD8 COMPASS Runs",
                       fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa

plotCOMPASSResultStack(crList[c(CD4RunNames, NotCD4RunNames, CD8RunNames)],
                       row_annotation = c("Stim"),
                       variable = "Stim",
                       show_rownames = FALSE,
                       main = "CD4, Not-CD4, and CD8 COMPASS Runs",
                       fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 7 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&!IL17a&IL2&IL4/5/13&TNFa

Drop assay controls

For this next part, drop the assay controls (healthies & TRIMAs)

# First change any NA Cohort levels to "Healthy"
crList2 <- lapply(crList,
                  function(cr) {
                    cr$data$meta$Cohort <- factor(ifelse(cr$data$meta$Cohort %in%
                                                c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", cr$data$meta$Cohort),
                                              levels = rev(c("Healthy", "Non-hospitalized", "Hospitalized")))
                    cr
                  })
names(crList2) <- names(crList)

crList.no_healthy <- lapply(names(crList2),
                  function(cr_name) {
                    cr <- crList2[[cr_name]]
                    print(sprintf("Accessing %s", cr_name))
                    cr$data$meta <- cr$data$meta %>% 
                      # Filter out samples which weren't run through COMPASS
                      dplyr::filter(!(`SAMPLE ID` %in% setdiff(cr$data$meta$`SAMPLE ID`, rownames(cr$fit$mean_gamma))))
                    stopifnot(all.equal(rownames(cr$fit$mean_gamma), cr$data$meta$`SAMPLE ID`))
                    
                    # And make sure that the count data only includes counts for the samples which were run through COMPASS
                    stopifnot(all.equal(rownames(cr$data$n_s), rownames(cr$fit$mean_gamma)))
                    stopifnot(all.equal(rownames(cr$data$n_u), rownames(cr$fit$mean_gamma)))
                    stopifnot(all.equal(names(cr$data$counts_s), rownames(cr$fit$mean_gamma)))
                    stopifnot(all.equal(names(cr$data$counts_u), rownames(cr$fit$mean_gamma)))
                    
                    not_healthy_idx <- which(cr$data$meta$Cohort != "Healthy")
                    cr$data$meta <- cr$data$meta[not_healthy_idx,] %>% 
                      mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")))
                    cr$fit$mean_gamma <- cr$fit$mean_gamma[not_healthy_idx,]

                    cr$data$n_s <- cr$data$n_s[not_healthy_idx,]
                    cr$data$n_u <- cr$data$n_u[not_healthy_idx,]
                    cr$data$counts_s <- cr$data$counts_s[not_healthy_idx]
                    cr$data$counts_u <- cr$data$counts_u[not_healthy_idx]
                    
                    cr
                  })
## [1] "Accessing 4_VEMP"
## [1] "Accessing NOT4_VEMP"
## [1] "Accessing 8_VEMP"
## [1] "Accessing 4_Spike_1"
## [1] "Accessing NOT4_Spike_1"
## [1] "Accessing 8_Spike_1"
## [1] "Accessing 4_Spike_2"
## [1] "Accessing NOT4_Spike_2"
## [1] "Accessing 8_Spike_2"
## [1] "Accessing 4_NCAP"
## [1] "Accessing NOT4_NCAP"
## [1] "Accessing 8_NCAP"
names(crList.no_healthy) <- names(crList2)
# Read in the patient manifest with complete data (though it's also in the COMPASSResult object)
data_manifest <- read.csv(here::here("data/Seshadri_HAARVI_PBMC_manifest_merged_11June2020.csv"), check.names = F, stringsAsFactors = F)

# Remember 116C and 37C didn't get run through COMPASS
# 75C was not run at all
# 07B/551432 didn't get run for Spike 2
# For CD8 only: "BWT20", "15548" didn't get run for "Spike 2", "NCAP", and 15530 didn't get run for "Spike 2"

FS across Stims

fs_pfs_df_with_manifest <- lapply(c(CD4RunNames, CD8RunNames), function(n) {
  as.data.frame(COMPASS::FunctionalityScore(crList[[n]])) %>%
    rownames_to_column("SAMPLE ID") %>% 
    rename(FS = `COMPASS::FunctionalityScore(crList[[n]])`) %>% 
    left_join(as.data.frame(COMPASS::PolyfunctionalityScore(crList[[n]])) %>%
      rownames_to_column("SAMPLE ID") %>% 
      rename(PFS = `COMPASS::PolyfunctionalityScore(crList[[n]])`),
      by = "SAMPLE ID") %>% 
    mutate(COMPASS_run = n)
  } ) %>% 
  data.table::rbindlist() %>% 
  separate(COMPASS_run, into = c("parent", "STIM"), remove = F, extra = "merge") %>% 
  mutate(STIM = factor(recode(STIM, "Spike_1" = "S1", "Spike_2" = "S2", "NCAP" = "N", "VEMP" = "E"),
                       levels = c("S1", "S2", "N", "E"))) %>%
  mutate("Record ID" = dplyr::recode(`SAMPLE ID`,
                                     "HS09" = "HS9")) %>% # also, 75C did not get run
  full_join(data_manifest, by = c("Record ID" = "Record ID")) %>% 
  dplyr::filter(!(`Record ID` %in% c("116C", "37C", "75C"))) %>% 
  mutate(Cohort = factor(ifelse(Cohort %in%
                                                c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", Cohort),
                                              levels = c("Hospitalized", "Non-hospitalized", "Healthy"))) %>% 
  # drop the healthies anyway
  dplyr::filter(Cohort != "Healthy") %>% 
  mutate(Days_Symptom_Onset_to_Visit_1 = as.numeric(`Days symptom onset to visit 1`)) %>% 
  dplyr::select("SAMPLE ID", "FS", "PFS", "COMPASS_run", "parent", "STIM",
                "Cohort", "Age", "Sex", "Race_v2", "Days_Symptom_Onset_to_Visit_1")

unique(fs_pfs_df_with_manifest$Cohort)
## [1] Hospitalized     Non-hospitalized
## Levels: Hospitalized Non-hospitalized Healthy
fs_pfs_df_with_manifest %>% dplyr::filter(is.na(FS) | is.na(`SAMPLE ID`) | is.na(STIM) | is.na(Cohort))
## Empty data.table (0 rows and 11 cols): SAMPLE ID,FS,PFS,COMPASS_run,parent,STIM...
# Note removal of points for quade.test in order to have complete block design
# CD4: remove 551432/07B 
# And for CD8 need to remove more individuals for complete block design:
# !(`SAMPLE ID` %in% c("BWT20", "15548", "15530"))
# Statistics is calculated after exclusion, but all points are plotted

plot_fs_vs_stim <- function(current_parent) {
  StimColors <- c("S1" = "#f0027fac", "S2" = "#fdc086ac", "N" = "#beaed4ac", "E" = "#e6e63cac")
  StimOutlineColors <- c("S1" = "#f0027fdf", "S2" = "#fdc086df", "N" = "#beaed4df", "E" = "#e6e63cdf")
  
  current_data_for_test <- fs_pfs_df_with_manifest %>%
    dplyr::filter(parent == current_parent) %>% 
    pivot_wider(id_cols = c("SAMPLE ID"), names_from = STIM, values_from = FS) %>% 
    # Remove any individuals which don't have data for all 4 stims, for the purposes of having complete data for the test
    na.omit() %>% 
    # For the pairwise wilcox test, make the data long again
    pivot_longer(cols = c(S1, S2, N, E), names_to = "STIM", values_to = "FS") %>% 
    # mutate(STIM = factor(STIM, levels = c("S1", "S2", "N", "E"))) %>% 
    # It should already be in order, but make sure the SAMPLE IDs are in the same order for each STIM
    arrange(`SAMPLE ID`, STIM)
  quade_test <- quade.test(FS ~ STIM | `SAMPLE ID`, data = current_data_for_test)
  pwt <- pairwise.wilcox.test(current_data_for_test$FS, current_data_for_test$STIM, p.adjust.method = "bonferroni")
  
  # For the plot, don't filter out individuals who don't have data for all 4 stims
  # This may result in inconsistency between the test results and the plot, but it should be pretty similar
  current_data_for_plot <- fs_pfs_df_with_manifest %>%
    dplyr::filter(parent == current_parent) %>% 
    mutate(STIM = factor(STIM, levels = c("S1", "S2", "N", "E")))
  
  current_plot <- ggplot(current_data_for_plot, aes(STIM, FS)) +
    theme_bw(base_size = 22) +
    geom_hline(yintercept = 0, linetype="dashed", alpha = 0.5) +
    geom_violin(aes(fill = `STIM`), draw_quantiles = c(0.5),
                # All violins have the same maximum width. Overrides default of equal area
                scale="width", width = 0.6) +
    geom_violin(fill="transparent", draw_quantiles = c(0.25, 0.75), linetype = "dashed",
                scale="width", width = 0.6) +
    geom_violin(aes(color=`STIM`), fill="transparent",
              scale="width", width=0.6, size=1.1) +
    geom_quasirandom(size=0.1, width=0.2, varwidth=T, method="quasirandom") +
    theme(axis.title.x = element_blank(),
          axis.title.y = element_text(size=27),
          axis.text.y = element_text(color="black", size=20),
          axis.text.x = element_text(color="black", size=26),
          plot.title = element_text(hjust = 0.5, size=29),
          panel.grid = element_blank(),
          legend.position = "none",
          plot.margin = margin(0.3, 0.2, 0.1, 0.2, "cm")) +
    # scale_x_discrete(expand = c(0.3, 0.3)) +
    labs(y = "Functionality Score",
         title = sprintf("CD%s", current_parent)) +
    scale_fill_manual(values = StimColors) +
    scale_color_manual(values = StimOutlineColors)
  
  list("plot" = current_plot,
       "quade_test" = quade_test,
       "wilcox_test" = pwt)
}

cd4_fs_vs_stim_out <- plot_fs_vs_stim("4")
cd4_fs_vs_stim_out$quade_test$p.value
## [1] 6.408814e-32
ggplot_build(cd4_fs_vs_stim_out$plot)$layout$panel_params[[1]]$y.range
## [1] -0.005442067  0.114283406
annotation_df <- broom::tidy(cd4_fs_vs_stim_out$wilcox_test) %>%
  dplyr::filter(p.value < 0.05) %>% 
  mutate(y_pos = c(0.118, 0.145, 0.132, 0.114),
         p.text = if_else(p.value < 0.001, "p<.001", paste0("p=", sub("0.", ".", round(p.value, 3)))))
cd4_fs_vs_stim_plot <- cd4_fs_vs_stim_out$plot +
  coord_cartesian(ylim = c(NA, 0.154)) +
  suppressWarnings(ggsignif::geom_signif(inherit.aes=F,data=annotation_df,
                                         aes_string(xmin="group1", xmax="group2", annotations="p.text", y_position="y_pos"),
                                         tip_length = c(0.005, 0.005),
                                         textsize=5,
                                         manual = TRUE))
cd4_fs_vs_stim_plot

cd8_fs_vs_stim_out <- plot_fs_vs_stim("8")
cd8_fs_vs_stim_out$quade_test$p.value
## [1] 5.442791e-08
ggplot_build(cd8_fs_vs_stim_out$plot)$layout$panel_params[[1]]$y.range
## [1] -0.002139902  0.044937933
annotation_df <- broom::tidy(cd8_fs_vs_stim_out$wilcox_test) %>%
  dplyr::filter(p.value < 0.05) %>% 
  mutate(y_pos = c(0.061, 0.053, 0.045),
         p.text = if_else(p.value < 0.001, "p<.001", paste0("p=", sub("0.", ".", round(p.value, 3)))))
cd8_fs_vs_stim_plot <- cd8_fs_vs_stim_out$plot +
  coord_cartesian(ylim = c(NA, 0.065)) +
  suppressWarnings(ggsignif::geom_signif(inherit.aes=F,data=annotation_df,
                                         aes_string(xmin="group1", xmax="group2", annotations="p.text", y_position="y_pos"),
                                         tip_length = c(0.005, 0.005),
                                         textsize=5,
                                         manual = TRUE))
cd8_fs_vs_stim_plot

# Note that the above data are paired, even though it's represented as violin plots.
if(save_output) {

  pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig4A_CD4_FS_vs_Stim.pdf"), width=5, height=4,
        onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
  cd4_fs_vs_stim_plot
  dev.off()
  
  pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig5F_CD8_FS_vs_Stim.pdf"), width=5, height=4,
      onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
  cd8_fs_vs_stim_plot
  dev.off()
}

As we saw in the heatmaps, NCAP and Spike 1 consistently have the highest FS/PFS scores, and VEMP has the lowest.

How correlated are FS/PFS across stims?

fs_pfs_df_wide <- fs_pfs_df_with_manifest %>% 
  dplyr::select("SAMPLE ID", "STIM", "FS", "PFS", "parent") %>% 
 pivot_wider(id_cols = c(`SAMPLE ID`),
             names_from = c(STIM, parent),
             values_from = c(FS, PFS))

cd4_fs_cor_plot <- ggcorr(fs_pfs_df_wide %>% 
         dplyr::select(-c(`SAMPLE ID`)) %>% 
         dplyr::select(matches("^FS.*_4")) %>% 
         rename_all(~ sub(".*_(.*)_.*", "\\1", .)) %>% 
         na.omit(),
       nbreaks=8, label=TRUE, label_size=7, label_color='black',
       legend.position = "none", size=9)
cd4_fs_cor_plot

cd8_fs_cor_plot <- ggcorr(fs_pfs_df_wide %>% 
         dplyr::select(-c(`SAMPLE ID`)) %>% 
         dplyr::select(matches("^FS.*_8")) %>% 
         rename_all(~ sub(".*_(.*)_.*", "\\1", .)) %>% 
         na.omit(),
       nbreaks=8, label=TRUE, label_size=7, label_color='black',
       legend.position = "none", size=9)
cd8_fs_cor_plot

Write out the FS scores to a file for further integration w/ other T cell and Ab data

if(save_output) {
  fs_dat_2save <- fs_pfs_df_wide %>% 
    rename("SAMPLE_ID" = "SAMPLE ID") %>% 
    dplyr::select("SAMPLE_ID" | matches("^FS.*_4") | matches("^FS.*_8")) %>% 
    rename_all(~ sub("Spike_", "S", .)) %>% 
    rename_at(vars(-"SAMPLE_ID"), ~ sub("FS_(.*)_(.*)", "CD\\2_\\1_FS", .)) %>% 
    # Convert the numeric columns to character type after rounding to 20 digits. This will help ensure consistency when writing and reading the data to a file
    mutate_at(vars(-"SAMPLE_ID"), ~ format(., digits = 20))
  
  write.csv(fs_dat_2save,
            here::here("processed_data/20200814_HAARVI_COMPASS_FS.csv"), row.names = F)
  # fs_dat_2save <- read.csv(here::here("processed_data/20200814_HAARVI_COMPASS_FS.csv"), stringsAsFactors = F, check.names = F, colClasses = "character")
  # all.equal(fs_dat_2save,
  #  read.csv(here::here("processed_data/20200814_HAARVI_COMPASS_FS.csv"), stringsAsFactors = F, check.names = F, colClasses = "character"))
}
# Investigate the odd dichotomization of FS_VEMP_4 values:
FS_VEMP_4_dat <- as.data.frame((COMPASS::FunctionalityScore(crList$`4_VEMP`))) %>%
  rownames_to_column("SAMPLE ID") %>%
  rename(`4_VEMP_FS` = "(COMPASS::FunctionalityScore(crList$`4_VEMP`))") %>% 
  left_join(crList$`4_VEMP`$data$meta %>% dplyr::select(`SAMPLE ID`, Batch, Cohort, Age, Sex, Race_v2, `Days symptom onset to visit 1`, `$DATE`),
            by = "SAMPLE ID") %>% 
  left_join(crList$`4_VEMP`$fit$mean_gamma %>% as.data.frame() %>% rownames_to_column("SAMPLE ID"),
            by = "SAMPLE ID") %>% 
  mutate(Batch = factor(ifelse(`$DATE` == "05-JUN-2020", 3, Batch)),
         Cohort = factor(ifelse(Cohort %in%
                                                c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", Cohort),
                                              levels = c("Healthy", "Non-hospitalized", "Hospitalized")),
         `Days symptom onset to visit 1` = as.numeric(`Days symptom onset to visit 1`),
         Race_v2 = factor(ifelse(Race_v2 %in%
                                                c(NA, "N/A"), NA, Race_v2),
                                              levels = c("American Indian or Alaska Native", "Asian", "White", NA)))
## Warning: Problem with `mutate()` input `Days symptom onset to visit 1`.
## ℹ NAs introduced by coercion
## ℹ Input `Days symptom onset to visit 1` is `as.numeric(`Days symptom onset to visit 1`)`.
## Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion
# It has everything to do with the IL4/5/13 only subset
ggplot(FS_VEMP_4_dat, aes(`!IL2&IL4/5/13&!IFNg&!TNFa&!IL17a&!CD154&!CD107a`, `4_VEMP_FS`)) +
  geom_point() +
  labs(x = "IL4/5/13 only subset mean_gamma")

# But is there another metadata variable which explains the IL4/5/13 dichotomy?
ggplot(FS_VEMP_4_dat, aes(Batch, `4_VEMP_FS`)) +
  theme_bw(base_size = 20) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.1)

ggplot(FS_VEMP_4_dat, aes(Cohort, `4_VEMP_FS`)) +
  theme_bw(base_size = 20) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.1) +
  theme(axis.text.x = element_text(size=10, angle = 20))

ggplot(FS_VEMP_4_dat, aes(Race_v2, `4_VEMP_FS`)) +
  theme_bw(base_size = 20) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.1) +
  theme(axis.text.x = element_text(size=10, angle = 20))

ggplot(FS_VEMP_4_dat, aes(Age, `4_VEMP_FS`)) +
  theme_bw(base_size = 20) +
  geom_point()
## Warning: Removed 3 rows containing missing values (geom_point).

ggplot(FS_VEMP_4_dat, aes(`Days symptom onset to visit 1`, `4_VEMP_FS`)) +
  theme_bw(base_size = 20) +
  geom_point()
## Warning: Removed 6 rows containing missing values (geom_point).

FS_VEMP_4_dat %>% dplyr::filter(`4_VEMP_FS` > 0.013) %>% dplyr::select(`SAMPLE ID`, Batch, Cohort, `4_VEMP_FS`) %>% arrange(Batch, Cohort, `SAMPLE ID`)
##    SAMPLE ID Batch           Cohort  4_VEMP_FS
## 1       148C     1 Non-hospitalized 0.01574803
## 2        25C     1 Non-hospitalized 0.01580827
## 3        51C     1 Non-hospitalized 0.01574744
## 4      15575     1     Hospitalized 0.01582126
## 5       109C     2 Non-hospitalized 0.01574803
## 6       128C     2 Non-hospitalized 0.01574803
## 7       157C     2 Non-hospitalized 0.01574803
## 8        29C     2 Non-hospitalized 0.01574980
## 9        69C     2 Non-hospitalized 0.01584528
## 10        13     2     Hospitalized 0.01574803
## 11      142C     3 Non-hospitalized 0.01575591
## 12       96C     3 Non-hospitalized 0.01589272
## 13      121C     3     Hospitalized 0.01710177
## 14        23     3     Hospitalized 0.02362205
# These points are in each Batch and are not restricted to a Cohort

Matrix of the p-value of the correlation. Ignore PFS

# mat : is a matrix of data
# ... : further arguments to pass to the native R cor.test function
cor.mtest <- function(mat, ...) {
    mat <- as.matrix(mat)
    n <- ncol(mat)
    p.mat<- matrix(NA, n, n)
    diag(p.mat) <- 0
    for (i in 1:(n - 1)) {
        for (j in (i + 1):n) {
            tmp <- cor.test(mat[, i], mat[, j], ...)
            p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
        }
    }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}
getOption("na.action") # Default behavior of cor.test
## [1] "na.omit"
# matrix of the p-value of the correlation
# the test statistic is based on Pearson's product moment correlation coefficient cor(x, y)
# The P-value is the probability that you would have found the current result if the correlation coefficient were in fact zero (null hypothesis). If this probability is lower than the conventional 5% (P<0.05) the correlation coefficient is called statistically significant.

col <- colorRampPalette(c("#3B9AB2", "#EEEEEE", "#F21A00"))

# CD4 FS
cd4_fs_cor_dat <- fs_pfs_df_wide %>% 
               dplyr::select(-c(`SAMPLE ID`)) %>% 
               dplyr::select(starts_with("FS") & ends_with("_4")) %>% 
           rename_all(~ sub(".*_(.*)_.*", "\\1", .))
p.mat_cd4 <- cor.mtest(cd4_fs_cor_dat)
cd4_M <- cor(cd4_fs_cor_dat,
         use = "complete.obs") # missing values are handled by casewise deletion
cd4_fs_corrplot <- corrplot(cd4_M,
         method="color", col=col(200),  
         type="lower", order="original", 
         addCoef.col = "black", # Add coefficient of correlation
         tl.cex = 2, number.cex = 1.5,
         tl.pos = "ld", tl.col="black", tl.srt=0, #Text label color and rotation
         # Combine with significance
         p.mat = p.mat_cd4, sig.level = 0.05, insig = "blank", 
         # hide correlation coefficient on the principal diagonal
         diag=FALSE, addgrid.col="black", tl.offset = 1, cl.pos="n")

# significant coefficients are colored

# CD8 FS
cd8_fs_cor_dat <- fs_pfs_df_wide %>% 
               dplyr::select(-c(`SAMPLE ID`)) %>% 
               dplyr::select(starts_with("FS") & ends_with("_8")) %>% 
           rename_all(~ sub(".*_(.*)_.*", "\\1", .))
p.mat_cd8 <- cor.mtest(cd8_fs_cor_dat)
cd8_M <- cor(cd8_fs_cor_dat,
         use = "complete.obs") # missing values are handled by casewise deletion
cd8_fs_corrplot <- corrplot(cd8_M,
         method="color", col=col(200),  
         type="lower", order="original", 
         addCoef.col = "black", # Add coefficient of correlation
         tl.cex = 2, number.cex = 1.5,
         tl.pos = "ld", tl.col="black", tl.srt=0, #Text label color and rotation
         # Combine with significance
         p.mat = p.mat_cd8, sig.level = 0.05, insig = "blank", 
         # hide correlation coefficient on the principal diagonal
         diag=FALSE, addgrid.col="black", tl.offset = 1, cl.pos="n")

# significant coefficients are colored
if(save_output) {
  pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig4B_CD4_FS_corr.pdf"), width=4, height=4,
        onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
  corrplot(cd4_M,
         method="color", col=col(200),  
         type="lower", order="original", 
         addCoef.col = "black", # Add coefficient of correlation
         tl.cex = 2, number.cex = 1.5,
         tl.pos = "ld", tl.col="black", tl.srt=0, #Text label color and rotation
         # Combine with significance
         p.mat = p.mat_cd4, sig.level = 0.05, insig = "blank", 
         # hide correlation coefficient on the principal diagonal
         diag=FALSE, addgrid.col="black", tl.offset = 1, cl.pos="n")
  dev.off()
  
  pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig5G_CD8_FS_corr.pdf"), width=4, height=4,
      onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
  corrplot(cd8_M,
         method="color", col=col(200),  
         type="lower", order="original", 
         addCoef.col = "black", # Add coefficient of correlation
         tl.cex = 2, number.cex = 1.5,
         tl.pos = "ld", tl.col="black", tl.srt=0, #Text label color and rotation
         # Combine with significance
         p.mat = p.mat_cd8, sig.level = 0.05, insig = "blank", 
         # hide correlation coefficient on the principal diagonal
         diag=FALSE, addgrid.col="black", tl.offset = 1, cl.pos="n")
  dev.off()  
}

Note that FS and PFS scores for the same condition tend to be highly correlated (not shown). Not surprising.

In general, we can’t rely on a particular condition to predict the FS/PFS of all the other conditions. Each STIM and co-receptor subset provides different information. (If the former were true, it might have simplified our upcoming analyses)

Regress FS vs Demographics

FS vs Age

# StimColors <- c("S1" = "#f0027f", "S2" = "#fdc086", "N" = "#beaed4", "E" = "#e6e63c")
StimColors <- c("S1" = "#f0027fac", "S2" = "#fdc086ac", "N" = "#beaed4ac", "E" = "#e6e63cac")
fs_vs_x_plot <- function(current_parent, current_stim, xaxis="Age", include_regression_line = TRUE) {
  current_data <- fs_pfs_df_with_manifest %>% 
    dplyr::filter(parent == !!current_parent & STIM == !!current_stim)
  
  current_plot <- ggplot(current_data, aes(!!as.symbol(xaxis), FS)) +
    theme_bw(base_size = 22) +
    geom_point(fill=StimColors[[current_stim]], pch=21, size=2.5) +
    theme(axis.title.x = element_text(size=20),
          axis.title.y = element_text(size=20),
          axis.text.y = element_text(color="black", size=17),
          axis.text.x = element_text(color="black", size=17),
          plot.title = element_text(hjust=0.5, size=21),
          panel.grid = element_blank(),
          legend.position = "none",
          plot.margin = margin(0.3, 0.4, 0.1, 0.2, "cm")) +
    labs(title = current_stim,
         x = sub("([A-Za-z]*)_.*", "\\1", xaxis))
  if(include_regression_line) {
    current_plot <- current_plot + 
    # Linear regression line with 95% CI (based on "standard error of predicted means")
    # By "predicted means", we're talking about the regression line's y-value at each value of x
    # Pretty sure the grey 95% CI is the "confidence interval of the prediction", as discussed here: https://statisticsbyjim.com/glossary/confidence-interval-prediction/
    # So it's not about individual points (a prediction interval), but rather the accuracy of the predicted mean y-value at each x
    # Help understanding CI from ?predict.lm and here: https://stackoverflow.com/questions/45742987/how-is-level-used-to-generate-the-confidence-interval-in-geom-smooth
    geom_smooth(color = "black", method="lm")  +
    # stat_cor from ggpubr
    stat_cor(method = "pearson", size=5.5)
  }
  current_plot
}

parents_vec <- rep(c("4", "8"), each = 4)
stim_vec <- rep(c("S1", "S2", "N", "E"), times = 2)
fs_vs_age_plot_list <- map2(parents_vec, stim_vec, fs_vs_x_plot)
names(fs_vs_age_plot_list) <- paste0(stim_vec, "_", parents_vec)

# p-values NOT adjusted:
fs_vs_age_plot_list
## $S1_4
## `geom_smooth()` using formula 'y ~ x'

## 
## $S2_4
## `geom_smooth()` using formula 'y ~ x'

## 
## $N_4
## `geom_smooth()` using formula 'y ~ x'

## 
## $E_4
## `geom_smooth()` using formula 'y ~ x'

## 
## $S1_8
## `geom_smooth()` using formula 'y ~ x'

## 
## $S2_8
## `geom_smooth()` using formula 'y ~ x'

## 
## $N_8
## `geom_smooth()` using formula 'y ~ x'

## 
## $E_8
## `geom_smooth()` using formula 'y ~ x'

FS vs Days from Symptom Onset to Visit

fs_vs_days_plot_list <- map2(parents_vec, stim_vec, fs_vs_x_plot, xaxis="Days_Symptom_Onset_to_Visit_1")
names(fs_vs_days_plot_list) <- paste0(stim_vec, "_", parents_vec)

fs_vs_days_plot_list
## $S1_4
## `geom_smooth()` using formula 'y ~ x'

## 
## $S2_4
## `geom_smooth()` using formula 'y ~ x'

## 
## $N_4
## `geom_smooth()` using formula 'y ~ x'

## 
## $E_4
## `geom_smooth()` using formula 'y ~ x'

## 
## $S1_8
## `geom_smooth()` using formula 'y ~ x'

## 
## $S2_8
## `geom_smooth()` using formula 'y ~ x'

## 
## $N_8
## `geom_smooth()` using formula 'y ~ x'

## 
## $E_8
## `geom_smooth()` using formula 'y ~ x'

FS vs Sex

StimSexColors <- list("S1" = c("F" = "#bd0264ac", "M" = "#fd2898ac"),
                      "S2" = c("F" = "#fca654ac", "M" = "#fedab8ac"),
                      "N" = c("F" = "#a38dc2ac", "M" = "#d9cfe6ac"),
                      "E" = c("F" = "#d4d41bac", "M" = "#ecec69ac"))
StimSexOutlineColors <- list("S1" = c("F" = "#bd0264ff", "M" = "#fd2898ff"),
                      "S2" = c("F" = "#fca654ff", "M" = "#fedab8ff"),
                      "N" = c("F" = "#a38dc2ff", "M" = "#d9cfe6ff"),
                      "E" = c("F" = "#d4d41bff", "M" = "#ecec69ff"))
fs_vs_x_plot_categorical <- function(current_parent, current_stim, xaxis="Sex",
                                     current_fill_colors=StimSexColors[[current_stim]],
                                     current_outline_colors = StimSexOutlineColors[[current_stim]],
                                     xcategorylabs = NULL) {
  # current_parent <- "4"; current_stim <- "S1"; xaxis <- "Sex"; current_colors <- StimSexColors[[current_stim]]
  # CohortColors <- c("Hospitalized" = "#386cb0", "Non-hospitalized" = "#7fc97f", "Healthy" = "#ebeef0")
  # CohortLabs <- c("Non-hospitalized" = "NH", "Hospitalized" = "H")
  # current_parent <- "8"; current_stim <- "E"; xaxis <- "Cohort"; current_colors <- CohortColors; xcategorylabs <- CohortLabs
 
  current_data <- fs_pfs_df_with_manifest %>% 
    dplyr::filter(parent == !!current_parent & STIM == !!current_stim)
  
  current_plot <- ggplot(current_data, aes(!!as.symbol(xaxis), FS)) +
    theme_bw(base_size = 22) +
    geom_violin(aes(fill = !!as.symbol(xaxis)), draw_quantiles = c(0.5),
                # All violins have the same maximum width. Overrides default of equal area
                scale="width", width = 0.6) +
    geom_violin(fill="transparent", draw_quantiles = c(0.25, 0.75), linetype = "dashed",
                scale="width", width = 0.6) +
    geom_violin(aes(color=!!as.symbol(xaxis)), fill="transparent",
              scale="width", width=0.6, size=1.1) +
    geom_quasirandom(size=0.1, width=0.2, varwidth=T, method="quasirandom") +
    theme(axis.title.x = element_blank(),
          axis.title.y = element_text(size=20),
          axis.text.y = element_text(color="black", size=17),
          axis.text.x = element_text(color="black", size=20),
          plot.title = element_text(hjust = 0.5, size=21),
          panel.grid = element_blank(),
          legend.position = "none",
          plot.margin = margin(0.3, 0.2, 0.1, 0.2, "cm")) +
    labs(title = current_stim) + #,
         # x = sub("([A-Za-z]*)_.*", "\\1", xaxis)) +
    scale_fill_manual(values = current_fill_colors) +
    scale_color_manual(values = current_outline_colors)
  
  wilcox_result <- wilcox.test(as.formula(sprintf("FS ~ %s", xaxis)), data = current_data)
  p.text <- if(wilcox_result$p.value < 0.001) {
    "p<.001"
  } else { 
    paste0("p=", sub("0.", ".", round(wilcox_result$p.value, 3)))
  }
  y_range <- ggplot_build(current_plot)$layout$panel_params[[1]]$y.range
  current_plot <- current_plot +
    coord_cartesian(ylim = c(NA, y_range[[2]] + 0.07*diff(y_range))) +
    annotate("text", x = 1.5, y = y_range[[2]] + 0.01*diff(y_range), label = p.text, size=5.5)
  if(!is.null(xcategorylabs)) {
    current_plot <- current_plot +
      scale_x_discrete(labels=xcategorylabs)
  }
  current_plot
}

parents_vec <- rep(c("4", "8"), each = 4)
stim_vec <- rep(c("S1", "S2", "N", "E"), times = 2)
fs_vs_sex_plot_list <- map2(parents_vec, stim_vec, fs_vs_x_plot_categorical)
## Warning in wilcox.test.default(x = c(0.00823543307086614, 0.00798326771653543, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.000123818897637795,
## 4.3503937007874e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.00817322834645669, 0.0078740157480315, :
## cannot compute exact p-value with ties
names(fs_vs_sex_plot_list) <- paste0(stim_vec, "_", parents_vec)

fs_vs_sex_plot_list
## $S1_4

## 
## $S2_4

## 
## $N_4

## 
## $E_4

## 
## $S1_8

## 
## $S2_8

## 
## $N_8

## 
## $E_8

In many of the boxplots above, Males are shifted slightly higher on the FS/PFS axis compared to Females, but none of the comparisons are significant.

Hosp vs Non-Hosp

  1. Plot the COMPASS heatmaps again, this time including row annotations for Cohort
  2. Compare FS and PFS between Hosp and Non-Hosp

COMPASS Heatmaps

Look at the heatmaps again. Also focus on IFNg

Stack the plots from the different stims, this time with IFNg subsets on the RHS
A commonality to the subsets which are enriched in Hospitalized individuals is CD154.

cytokine_annotation_colors <- rep("black", 30)
cytokine_order_for_annotation = c("CD154", "IL2", "TNFa", "CD107a", "IL4/5/13", "IL17a", "IFNg")
# Adapt code from COMPASS::plotCompassResultStack to prepare the data for plotting
# First we use mergeMatricesForPlotCOMPASSResultStack to merge the data from the different runs together.
# Then use the merged data to create a fake merged COMPASSResult object with the merged categories, metadata, and mean_gamma data frames
# This should allow us to use some of the customized options in plot.COMPASSResult.ComplexHeatmap (e.g. to get the IFNg subsets on the RHS)
mergeMatricesForPlotCOMPASSResultStack_tmp <- utils::getFromNamespace("mergeMatricesForPlotCOMPASSResultStack", "COMPASS")

CohortColors_forHeatmap <- c("Conv Hosp" = "#707ed4", "Conv Non-Hosp" = "#c1ddd7", "Assay Controls" = "#ebeef0")
StimColors <- c("S1" = "#f0027f", "S2" = "#fdc086", "NCAP" = "#beaed4", "VEMP" = "#e6e63c") # https://colorbrewer2.org/?type=qualitative&scheme=Accent&n=6
row_ann_colors_v2 <- list(`Stim` = StimColors, `Cohort` = CohortColors_forHeatmap)

# CD4 runs
cd4_data2plot <- mergeMatricesForPlotCOMPASSResultStack_tmp(crList2[CD4RunNames],
                                   threshold=0.01,
                                   minimum_dof=1,
                                   maximum_dof=Inf,
                                   row_annotation = c("Stim", "Cohort"),
                                   variable = "Stim")
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa
if(save_output) {
  saveRDS(cd4_data2plot, here::here("processed_data/20200815_Merged_CD4_COMPASS_Data.rds"))
}
cd4_merged_COMPASSResult <- crList2$`4_Spike_1`
cd4_merged_COMPASSResult$fit$mean_gamma <- cd4_data2plot$MMerged
cd4_merged_COMPASSResult$fit$categories <- cd4_data2plot$catsMerged %>% mutate_all(~ as.numeric(as.character(.))) %>% mutate(Counts = rowSums(.))
rownames(cd4_merged_COMPASSResult$fit$categories) <- rownames(cd4_data2plot$catsMerged)
cd4_merged_COMPASSResult$data$meta <- cd4_data2plot$rowannMerged
cd4_merged_COMPASSResult$data$meta$Run_SAMPLE_ID <- rownames(cd4_merged_COMPASSResult$data$meta)
cd4_merged_COMPASSResult$data$meta$Stim <- factor(dplyr::recode(sub("^4_", "", cd4_merged_COMPASSResult$data$meta$Stim), "Spike_1" = "S1", "Spike_2" = "S2"), levels = c("S1", "S2", "NCAP", "VEMP"))
cd4_merged_COMPASSResult$data$meta$Cohort <- factor(recode(cd4_merged_COMPASSResult$data$meta$Cohort,
                                                           "Hospitalized" = "Conv Hosp",
                                                           "Non-hospitalized" = "Conv Non-Hosp",
                                                           "Healthy" = "Assay Controls"),
                                                    levels = c("Conv Hosp", "Conv Non-Hosp", "Assay Controls"))
cd4_merged_COMPASSResult$data$individual_id <- "Run_SAMPLE_ID"

cd4_heatmap_by_ifng <- plot.COMPASSResult.ComplexHeatmap(cr = cd4_merged_COMPASSResult,
                                  row_annotation = c("Stim", "Cohort"),
                                  cytokine_order_for_annotation = cytokine_order_for_annotation,
                                  dichotomize_by_cytokine = "IFNg",
                                  dichotomize_by_cytokine_color = "#999999",
                                  row_annotation_colors = row_ann_colors_v2,
                                  staircase_cytokine_annotation = TRUE)
draw(cd4_heatmap_by_ifng, gap = unit(0.1, "in"), merge_legends = T)

cd4_heatmap_by_ifng_gap <- plot.COMPASSResult.ComplexHeatmap(cr = cd4_merged_COMPASSResult,
                                  row_annotation = c("Stim", "Cohort"),
                                  cytokine_order_for_annotation = cytokine_order_for_annotation,
                                  dichotomize_by_cytokine = "IFNg",
                                  dichotomize_by_cytokine_color = "#999999",
                                  row_annotation_colors = row_ann_colors_v2,
                                  staircase_cytokine_annotation = TRUE,
                                  row_gap = unit(0.05, "in"))
# draw(cd4_heatmap_by_ifng_gap, gap = unit(0, "in"), merge_legends = T)

# CD8 runs
cd8_data2plot <- mergeMatricesForPlotCOMPASSResultStack_tmp(crList2[CD8RunNames],
                                   threshold=0.01,
                                   minimum_dof=1,
                                   maximum_dof=Inf,
                                   row_annotation = c("Stim", "Cohort"),
                                   variable = "Stim")
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
if(save_output) {
  saveRDS(cd8_data2plot, here::here("processed_data/20200815_Merged_CD8_COMPASS_Data.rds"))
}
cd8_merged_COMPASSResult <- crList2$`8_Spike_1`
cd8_merged_COMPASSResult$fit$mean_gamma <- cd8_data2plot$MMerged
cd8_merged_COMPASSResult$fit$categories <- cd8_data2plot$catsMerged %>% mutate_all(~ as.numeric(as.character(.))) %>% mutate(Counts = rowSums(.))
rownames(cd8_merged_COMPASSResult$fit$categories) <- rownames(cd8_data2plot$catsMerged)
cd8_merged_COMPASSResult$data$meta <- cd8_data2plot$rowannMerged
cd8_merged_COMPASSResult$data$meta$Run_SAMPLE_ID <- rownames(cd8_merged_COMPASSResult$data$meta)
cd8_merged_COMPASSResult$data$meta$Stim <- factor(dplyr::recode(sub("^8_", "", cd8_merged_COMPASSResult$data$meta$Stim), "Spike_1" = "S1", "Spike_2" = "S2"), levels = c("S1", "S2", "NCAP", "VEMP"))
cd8_merged_COMPASSResult$data$meta$Cohort <- factor(recode(cd8_merged_COMPASSResult$data$meta$Cohort,
                                                           "Hospitalized" = "Conv Hosp",
                                                           "Non-hospitalized" = "Conv Non-Hosp",
                                                           "Healthy" = "Assay Controls"),
                                                    levels = c("Conv Hosp", "Conv Non-Hosp", "Assay Controls"))
cd8_merged_COMPASSResult$data$individual_id <- "Run_SAMPLE_ID"

cd8_heatmap_by_ifng <- plot.COMPASSResult.ComplexHeatmap(cr = cd8_merged_COMPASSResult,
                                  row_annotation = c("Stim", "Cohort"),
                                  cytokine_order_for_annotation = cytokine_order_for_annotation,
                                  dichotomize_by_cytokine = "IFNg",
                                  dichotomize_by_cytokine_color = "#999999",
                                  row_annotation_colors = row_ann_colors_v2,
                                  staircase_cytokine_annotation = TRUE)
draw(cd8_heatmap_by_ifng, gap = unit(0.1, "in"), merge_legends = T)

cd8_heatmap_by_ifng_gap <- plot.COMPASSResult.ComplexHeatmap(cr = cd8_merged_COMPASSResult,
                                  row_annotation = c("Stim", "Cohort"),
                                  cytokine_order_for_annotation = cytokine_order_for_annotation,
                                  dichotomize_by_cytokine = "IFNg",
                                  dichotomize_by_cytokine_color = "#999999",
                                  row_annotation_colors = row_ann_colors_v2,
                                  staircase_cytokine_annotation = TRUE,
                                  row_gap = unit(0.05, "in"))
# draw(cd8_heatmap_by_ifng_gap, gap = unit(0, "in"), merge_legends = T)
if(save_output) {
  pdf(file=here::here("out/PostCompassPlots/20200902_CD4_COMPASS_Heatmap.pdf"), width=9, height=8,
        onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
  draw(cd4_heatmap_by_ifng, gap = unit(0.1, "in"), merge_legends = T)
  dev.off()
  
  pdf(file=here::here("out/PostCompassPlots/20200902_CD8_COMPASS_Heatmap.pdf"), width=5, height=8,
        onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica
  draw(cd8_heatmap_by_ifng, gap = unit(0.1, "in"), merge_legends = T)
  dev.off()
  
  # With row gaps
  pdf(file=here::here("out/PostCompassPlots/20200902_CD4_COMPASS_Heatmap_gap.pdf"), width=9, height=9,
      onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
  draw(cd4_heatmap_by_ifng_gap, gap = unit(0, "in"), merge_legends = T)
  dev.off()
  
  pdf(file=here::here("out/PostCompassPlots/20200902_CD8_COMPASS_Heatmap_gap.pdf"), width=5, height=9,
        onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica
  draw(cd8_heatmap_by_ifng_gap, gap = unit(0, "in"), merge_legends = T)
  dev.off()
  
  # Later run in linux terminal:
  # convert -density 300 20200902_CD4_COMPASS_Heatmap_Edit.pdf -quality 90 20200902_CD4_COMPASS_Heatmap_Edit.png
}

FS/PFS vs Cohort

CohortColors <- c("Hospitalized" = "#757bbcb2", "Non-hospitalized" = "#b0d2c8bf")
CohortOutlineColors <- c("Hospitalized" = "#757bbbff", "Non-hospitalized" = "#b0d1c8ff")
CohortLabs <- c("Hospitalized" = "H", "Non-hospitalized" = "NH")

parents_vec <- rep(c("4", "8"), each = 4)
stim_vec <- rep(c("S1", "S2", "N", "E"), times = 2)
fs_vs_cohort_plot_list <- map2(parents_vec, stim_vec, fs_vs_x_plot_categorical,
                               xaxis = "Cohort", current_fill_colors = CohortColors,
                               current_outline_colors = CohortOutlineColors, xcategorylabs = CohortLabs)
## Warning in wilcox.test.default(x = c(0.00823543307086614, 0.00870807086614173, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.000123818897637795,
## 0.00426889763779528, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.00817322834645669, 0.00848228346456693, :
## cannot compute exact p-value with ties
names(fs_vs_cohort_plot_list) <- paste0(stim_vec, "_", parents_vec)

fs_vs_cohort_plot_list
## $S1_4

## 
## $S2_4

## 
## $N_4

## 
## $E_4

## 
## $S1_8

## 
## $S2_8

## 
## $N_8

## 
## $E_8
## Warning in regularize.values(x, y, ties, missing(ties)): collapsing to unique
## 'x' values
## Warning in regularize.values(x, y, ties, missing(ties)): collapsing to unique
## 'x' values

# FS vs demographics plots to disk

if(save_output) {
  
  pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig4CDEF_CD4_FS_vs_Demographics.pdf"), width=14, height=14,
      onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
  (fs_vs_age_plot_list$S1_4 | fs_vs_age_plot_list$S2_4 | fs_vs_age_plot_list$N_4 | fs_vs_age_plot_list$E_4)  /
    (fs_vs_sex_plot_list$S1_4 | fs_vs_sex_plot_list$S2_4 | fs_vs_sex_plot_list$N_4 | fs_vs_sex_plot_list$E_4) /
    (fs_vs_days_plot_list$S1_4 | fs_vs_days_plot_list$S2_4 | fs_vs_days_plot_list$N_4 | fs_vs_days_plot_list$E_4) /
    (fs_vs_cohort_plot_list$S1_4 | fs_vs_cohort_plot_list$S2_4 | fs_vs_cohort_plot_list$N_4 | fs_vs_cohort_plot_list$E_4)
  dev.off()  
  
  pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig5HIJK_CD8_FS_vs_Demographics.pdf"), width=14, height=14,
    onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
  (fs_vs_age_plot_list$S1_8 | fs_vs_age_plot_list$S2_8 | fs_vs_age_plot_list$N_8 | fs_vs_age_plot_list$E_8)  /
    (fs_vs_sex_plot_list$S1_8 | fs_vs_sex_plot_list$S2_8 | fs_vs_sex_plot_list$N_8 | fs_vs_sex_plot_list$E_8) /
    (fs_vs_days_plot_list$S1_8 | fs_vs_days_plot_list$S2_8 | fs_vs_days_plot_list$N_8 | fs_vs_days_plot_list$E_8) /
    (fs_vs_cohort_plot_list$S1_8 | fs_vs_cohort_plot_list$S2_8 | fs_vs_cohort_plot_list$N_8 | fs_vs_cohort_plot_list$E_8)
  dev.off()  
}

Background-corrected magnitudes

source(here::here("scripts/20200604_Helper_Functions.R"))

# Arial is used by dotplot function below

# Note p-values are bonferroni adjusted in dotplots
# Also note that some points are not shown in order to zoom in better to the bulk of the point
bg_corr_dotplots <- pmap(.l = list(c(CD4RunNames, CD8RunNames),
                                   rep(c("CD4+", "CD8"), each = 4),
                                   list(NULL, c(0, 0.0085), NULL, NULL,
                                        NULL, NULL, c(0, 0.005), NULL),
                                   c(5, 5, 5, 5,
                                     5, 5, 5, 5)),
                         .f = function(n, p, y, p_text_size) {
                           # "staircase effect" for categories df heatmap gets done automatically, unlike in heatmap
                           # Returned dotplot is cowplot
                           make_dotplot_for_COMPASS_run(crList.no_healthy[[n]], n, parentSubset = p,
                                                        return_output = T, current_ylim = y, include_0_line=T, 
                                                        cytokine_order_for_annotation=cytokine_order_for_annotation,
                                                        p_text_size=p_text_size, group_by_colors=CohortColors,
                                                        zeroed_BgCorr_stats = FALSE, zeroed_BgCorr_plot = TRUE,
                                                        point_size = 1.2, dichotomize_by_cytokine = "IFNg")
                         })
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
names(bg_corr_dotplots) <- c(CD4RunNames, CD8RunNames)

if(save_output) {
  # Save the output of make_dotplot_for_COMPASS_run to disk so we can use the extracted data for follow-up analysis
  saveRDS(bg_corr_dotplots, here::here("processed_data/20200824_make_dotplot_for_COMPASS_run_list.rds"))
}

for(n in names(bg_corr_dotplots)) {
  print(plot_grid(ggplot() +
    theme_void() +
    geom_text(aes(0,0,label=sprintf("CD%s %s", sub("([48]).*", "\\1", n), sub("_", " ", sub("[48]_(.*)", "\\1", n)))), size=10) +
    xlab(NULL),
    bg_corr_dotplots[[n]]$Dotplot,
    ncol=1, rel_heights = c(0.1, 1)))
}

# How many points are out of bounds of the plot for plots where boundaries were specified?
bg_corr_dotplots$`4_Spike_2`$BgCorrMagnitudes %>% dplyr::filter_if(is.numeric, any_vars(. > 0.0085)) %>%
  select(-Individual, -Cohort) %>% 
  select_if(~max(.) > 0.0085)
## # A tibble: 1 x 1
##   `!IL2&IL4/5/13&!IFNg&!TNFa&!IL17a&!CD154&!CD107a`
##                                               <dbl>
## 1                                            0.0158
bg_corr_dotplots$`8_NCAP`$BgCorrMagnitudes %>% dplyr::filter_if(is.numeric, any_vars(. > 0.005)) %>%
  select(-Individual, -Cohort) %>% 
  select_if(~max(.) > 0.005)
## # A tibble: 1 x 3
##   `!IL2&!IL4/5/13&IFNg&!TNF… `!IL2&!IL4/5/13&IFNg&TNF… `!IL2&!IL4/5/13&IFNg&TNF…
##                        <dbl>                     <dbl>                     <dbl>
## 1                     0.0105                    0.0129                   0.00746
if(save_output) {
  pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5A_CD4_S1_COMPASS_Magnitudes_vs_Cohort.pdf"), width=12, height=7,
    onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
  bg_corr_dotplots[["4_Spike_1"]]$Dotplot
  dev.off()
  
  pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5B_CD4_S2_COMPASS_Magnitudes_vs_Cohort.pdf"), width=11.5, height=7,
    onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
  bg_corr_dotplots[["4_Spike_2"]]$Dotplot
  dev.off()
  
  pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5C_CD4_N_COMPASS_Magnitudes_vs_Cohort.pdf"), width=14, height=7,
    onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
  bg_corr_dotplots[["4_NCAP"]]$Dotplot
  dev.off()
  
  pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5D_CD4_E_COMPASS_Magnitudes_vs_Cohort.pdf"), width=3, height=7,
    onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
  bg_corr_dotplots[["4_VEMP"]]$Dotplot
  dev.off()
}
sig_tests_tally <- lapply(names(bg_corr_dotplots), function(n) {length(which(bg_corr_dotplots[[n]]$Test_Results$p.adj < 0.05))})
names(sig_tests_tally) <- names(bg_corr_dotplots)
sig_tests_tally # make sure all significant comparisons were plotted and accounted for
## $`4_Spike_1`
## [1] 9
## 
## $`4_Spike_2`
## [1] 4
## 
## $`4_NCAP`
## [1] 5
## 
## $`4_VEMP`
## [1] 0
## 
## $`8_Spike_1`
## [1] 0
## 
## $`8_Spike_2`
## [1] 0
## 
## $`8_NCAP`
## [1] 0
## 
## $`8_VEMP`
## [1] 0

Save to disk: background-corrected magnitudes from subsets which were significantly different across hospitalization status. For integrated analysis with other T cell and Ab data.

if(save_output) {
  signif_bgcorr_dat_list <- lapply(c(CD4RunNames, CD8RunNames), function(runName) {
    signif_subset_names <- bg_corr_dotplots[[runName]]$Test_Results %>% dplyr::filter(p.adj < 0.05) %>% dplyr::pull(BooleanSubset)
    bg_corr_dotplots[[runName]]$BgCorrMagnitudes %>% 
      rename("SAMPLE_ID" = "Individual") %>% 
      dplyr::select(c("SAMPLE_ID", signif_subset_names)) %>% 
      rename_at(vars(signif_subset_names), ~ paste("CD", sub("Spike_", "S", runName), " ", ., sep = ""))
  })
  names(signif_bgcorr_dat_list) <- c(CD4RunNames, CD8RunNames)

  bgcorr_dat_2save <- purrr::reduce(signif_bgcorr_dat_list, full_join, by = "SAMPLE_ID") %>% 
    # Convert proportions into percents
    # Convert the numeric columns to character type after rounding to 20 digits. This will help ensure consistency when writing and reading the data to a file
    mutate_at(vars(-"SAMPLE_ID"), ~ format(. * 100, digits = 20))

  write.csv(bgcorr_dat_2save, here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), row.names = F)
  # bgcorr_dat_2save <- read.csv(here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), stringsAsFactors = F, check.names = F)
  # all.equal(bgcorr_dat_2save %>% mutate_at(vars(-"SAMPLE_ID"), as.numeric),
  #  read.csv(here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), stringsAsFactors = F, check.names = F))
}